/*=======================================================================
Creator: Jingyuan Wang, jingyuanwang@u.northwestern.edu
Date created: 				08/23/2018
Date last modified: 		03/17/2018
Purpose: 
		Use QCEW data and SEDS energy price data to generate energy price
		
		Steps:
			I. National level energy prices:
				1. generate national level energy prices for the 8 types of energy
				   using weighted sum of state energy prices
			II. generate ASM electricity price for comparison
			III. Merge in MECS energy usage share 
				1. generate time-variant and time-invariant energy use share
				2. merge in
			IV. generate:
				1. electricity price
				2. fuel price using time-variant share & time-invariant share
				3. energy prices (elec+fuel) using time-variant share & time-invariant share
			V. save
==========================================================================*/

*************************************************************************
* 				PART I . Generate national energy prices of the 8 	    *
* 						 energy types = weighted average of state prices*
*************************************************************************
use "$energy_merged/QCEW_SEDS_MECS_CI_state_ind_year.dta", clear

*1. keep only naics6 level observations
keep if industry_level == 6  | NAICS_code == 31131 | NAICS_code == 31181


*2. keep only state level observations

* use state-level employment if state add up to 60% of US employment
preserve
	keep state_code state state_name industry_code NAICS_code sector  year total_annual_wages annual_avg_emplvl
	sort state_code 
	egen group = group(state_code)
	
	keep annual_avg_emplvl total_annual_wages year NAICS_code group
	reshape wide annual_avg_emplvl total_annual_wages, i(NAICS_code year) j(group)
	
	forvalues i = 1(1)52{
		replace total_annual_wages`i' = 0 if total_annual_wages`i' == .
		replace annual_avg_emplvl`i' = 0 if annual_avg_emplvl`i' == .
	}
	*
	
	gen wage_tot = 0
	gen employment_tot = 0
	forvalues i = 2(1)52{
		replace wage_tot = wage_tot + total_annual_wages`i' 
		replace employment_tot = employment_tot +  annual_avg_emplvl`i'
	}
	*
	
	gen rate_w = wage_tot/total_annual_wages1
	gen rate_e = employment_tot/annual_avg_emplvl1
	
	bysort NAICS_code: egen rate_w_tiv = mean(rate_w)
	bysort NAICS_code: egen rate_e_tiv = mean(rate_e)
	bysort NAICS_code: egen rate_e_min = min(rate_e)
	
	gen national = 0
	replace national = 1 if rate_e_tiv < 0.5 |  rate_e_min < 0.4
	keep national NAICS_code year annual_avg_emplvl1
	rename annual_avg_emplvl1 annual_avg_emplvl_us
	label var annual_avg_emplvl_us "industry-us-year. Annual average of monthly employment levels"
	tempfile nationalemployment
	save `nationalemployment.dta', replace
	
restore

merge m:1 NAICS_code year using `nationalemployment.dta'
drop _merge

keep if state != "US" | NAICS_code == 331411 | NAICS_code == 316212 | national == 1
* industry 331411 does not have state level employment
drop if NAICS_code == 316212 & state == "NY"
* industry 316212: 2009-2010 US + 2001 NY
drop if national == 1 & state != "US"

*2. weighted average and collapse to national level
* (1) elec
gen price_elec_weightedsum = price_elec * annual_avg_emplvl

* (2) 7 types of fuel
foreach v in  "RF" "DF" "NG" "HL" "PC" "CL" "PE" {
	gen price_`v'_weightedsum = price_`v' * annual_avg_emplvl
}
*

* (3) weighted sum
keep NAICS_code sector year naics_mecs fuelshare_ISnaics6level  ///
		price_*_weightedsum  annual_avg_emplvl national  annual_avg_emplvl_us
collapse (first) annual_avg_emplvl_us (sum) price_*_weightedsum  annual_avg_emplvl  ///
			, by(year NAICS_code sector naics_mecs fuelshare_ISnaics6level national)
			
foreach v in "elec" "RF" "DF" "NG" "HL" "PC" "CL" "PE" {
	gen price_`v' = price_`v'_weightedsum / annual_avg_emplvl
	label var price_`v' "price in $ per million Btu"
}
*
	
*3. save 
drop *weightedsum
keep NAICS_code sector year naics_mecs fuelshare_ISnaics6level  price_* national annual_avg_emplvl*

tempfile energyprice
save `energyprice.dta', replace


*************************************************************************
* 				PART III. Generate ASM electricity price				*
*************************************************************************		
preserve
	* 1. import data
	use "$ASM/ASM.dta", clear
	keep if level == 6 | naicsid_str == "31131" | naicsid_str == "31181"
	keep if code == "6-digit"

	*2. generate elctricity price
	gen price_elec_ASM = cst_elec/q_elec_pch
	
	*3. save 
	keep naicsid year naicsdisplaylabel price_elec_ASM
	rename naicsid NAICS_code
	rename naicsdisplaylabel industry_name
	drop industry_name

	label var price_elec_ASM "electricity price from ASM, $ per kWh"			

	tempfile ASMprice
	save `ASMprice.dta', replace
restore

*************************************************************************
* 				PART III. fuel share from MECS							*
*************************************************************************
preserve
	tempfile MECS_share
	
	do "$buildpath/code/subfunctions/gen_fuelshare_MECS.do"
	
	save `MECS_share.dta', replace	
	
	* save a list of available NAICS6 industries
	keep naics_mecs 
	keep if naics_mecs > 999
	duplicates drop
	tempfile mecscode
	save `mecscode.dta', replace

restore


*************************************************************************
* 				PART III. Generate electricity price					*
* 						  Generate fuel price							*
*************************************************************************

* 1. set up the panel:
do "$buildpath/code/subfunctions/genPanel_1990-2016.do" 

* generate contemporary code
gen NAICS_code = naics2007
replace NAICS_code = naics2012 if year>=2011

* 2. merge in energy price
merge m:1 NAICS_code year using `energyprice.dta'
drop if _merge == 2
* 339111 is 2002 NAICS code, it corresponds to 339114 or 334516 in 2007 code. The employment values of this industry is small. So I just drop it.
drop _merge

* 3. merge in fuel share
replace fuelshare_ISnaics6level = 0 if naics_mecs == 324122 | naics_mecs == 331110
replace naics_mecs = 324 if naics_mecs  == 324122
replace naics_mecs = 331 if naics_mecs  == 331110

merge m:1 naics_mecs year using `MECS_share.dta'
drop if _merge == 2
drop _merge


* 2. merge in ASM price
replace NAICS_code = naics2007 if year == 2011
merge m:1 NAICS_code year using `ASMprice.dta'
drop _merge
sort NAICS_code year		

* 3. [Time-variant] generate weighted average fuel price and energy price
gen price_fuel_TV = share_RF*price_RF + share_DF*price_DF + share_NG*price_NG ///
				+ share_HL*price_HL + share_CL*price_CL + share_PC*price_PC + share_PE*price_PE
* double check:
egen share_fuel = rowtotal(share_RF share_DF share_NG share_HL share_CL share_PC share_PE )
gen share_tot = share_fuel + share_elec
sum share_tot // great, all 1s
drop share_tot share_fuel

replace price_fuel_TV = price_fuel_TV / (1 - share_elec)
label var price_fuel_TV "fuel price [time-variant weighted], $ per million Btu"

gen price_TV = price_elec * share_elec + price_fuel_TV * (1-share_elec)
label var price_TV "energy price [time-variant weighted], $ per million Btu"

* 4. [Time-invariant] generate weighted average fuel price and energy price
gen price_fuel_TIV = share_RF_inv*price_RF + share_DF_inv*price_DF + share_NG_inv*price_NG ///
				+ share_HL_inv*price_HL + share_CL_inv*price_CL + share_PC_inv*price_PC + share_PE_inv*price_PE
* double check:
egen share_fuel_inv = rowtotal(share_RF_inv share_DF_inv share_NG_inv share_HL_inv share_CL_inv share_PC_inv share_PE_inv )
gen share_tot = share_fuel_inv + share_elec_inv
sum share_tot // great, all 1s
drop share_tot share_fuel_inv

replace price_fuel_TIV = price_fuel_TIV / (1 - share_elec_inv)
label var price_fuel_TIV "fuel price [time-invariant (2007) weighted], $ per million Btu"

gen price_TIV = price_elec * share_elec_inv + price_fuel_TIV * (1-share_elec_inv)
label var price_TIV "energy price [time-invariant (2007) weighted], $ per million Btu"


* 4' [2010 share] weighted average fuel price and energy price
gen price_fuel_2010 = share_RF_2010*price_RF + share_DF_2010*price_DF + share_NG_2010*price_NG ///
				+ share_HL_2010*price_HL + share_CL_2010*price_CL + share_PC_2010*price_PC + share_PE_2010*price_PE
* double check:
egen share_fuel_2010 = rowtotal(share_RF_2010 share_DF_2010 share_NG_2010 share_HL_2010 share_CL_2010 share_PC_2010 share_PE_2010 )
gen share_tot = share_fuel_2010 + share_elec_2010
sum share_tot // great, all 1s
drop share_tot share_fuel_2010

replace price_fuel_2010 = price_fuel_2010 / (1 - share_elec_2010)
label var price_fuel_2010 "fuel price [2010 fuel share weighted], $ per million Btu"

gen price_2010 = price_elec * share_elec_2010 + price_fuel_2010 * (1-share_elec_2010)
label var price_2010 "energy price [2010 fuel share weighted], $ per million Btu"

* 5 rename and rescale
label var share_elec "time-variant share of electricity usage over total energy usage in Btu. (Not in $)"
label var price_elec "electricity price in $ per million Btu"

replace price_elec_ASM = price_elec_ASM/3412.14 * 1000000
label var price_elec_ASM "electricity price from ASM, $ per million Btu"

rename fuelshare_ISnaics6level  energyshare_ISnaics6level
label var energyshare_ISnaics6level "Whether the share of different fuel types and elec are at NAICS6 level"

keep census_naics naics2007 naics2012 industryname2007 industryname2012 NAICS_code sector year naics_mecs energyshare_ISnaics6level price_TV price_TIV price_2010 share_elec share_NG price_elec_ASM price_fuel* price_elec annual_avg_emplvl*
order census_naics naics2007 naics2012 NAICS_code industryname2007 industryname2012 sector naics_mecs year share_elec share_NG energyshare_ISnaics6level price_TV price_TIV price_2010 price_fuel_TV price_fuel_TIV price_fuel_2010 price_elec price_elec_ASM
rename NAICS_code naics
label var naics "1990-2011: 2007 code; 2012+: 2012 code"
label var annual_avg_emplvl "aggregated state level annual average of monthly employment levels for a given year, from QCEW"
label var annual_avg_emplvl_us "us level annual average of monthly employment levels for a given year, from QCEW"

*************************************************************************
* 				PART IV. save											*
*************************************************************************

* 1. generate a price [time-invariant fuel share] at NAICS2012 level
local weight "annual_avg_emplvl"
preserve
	keep if year <= 2011
	keep naics2012 year price_TIV price_fuel_TIV price_TV price_fuel_TV price_elec `weight'
	replace price_TIV      		= price_TIV * `weight'
	replace price_fuel_TIV 		= price_fuel_TIV * `weight'
	replace price_TV      		= price_TV * `weight'
	replace price_fuel_TV 		= price_fuel_TV * `weight'
	replace price_elec     		= price_elec * `weight'
	
	collapse (sum) price_TIV price_fuel_TIV price_TV price_fuel_TV price_elec `weight', by(naics2012 year)
	replace price_TIV 			= price_TIV/`weight'
	replace price_fuel_TIV 		= price_fuel_TIV/`weight'
	replace price_TV 			= price_TV/`weight'
	replace price_fuel_TV 		= price_fuel_TV/`weight'
	replace price_elec 			= price_elec/`weight'
	
	
	rename price_TIV price_TIV_2012NAICS
	label var price_TIV_2012NAICS "energy price [time-invariant (2007) weighted] for NAICS2012 industries, $ per million Btu"
	rename price_fuel_TIV price_fuel_TIV_2012NAICS
	label var price_fuel_TIV_2012NAICS "fuel price [time-invariant (2007) weighted] for NAICS2012 industries, $ per million Btu"
	
	rename price_TV price_TV_2012NAICS
	label var price_TV_2012NAICS "energy price [time-variant weighted] for NAICS2012 industries, $ per million Btu"
	rename price_fuel_TV price_fuel_TV_2012NAICS
	label var price_fuel_TV_2012NAICS "fuel price [time-variant weighted] for NAICS2012 industries, $ per million Btu"
	
	rename price_elec price_elec_2012NAICS
	label var price_elec_2012NAICS "electricity price for NAICS2012 industries, $ per million Btu"
	
	rename `weight' `weight'_2012NAICS
	label var `weight'_2012NAICS "collapse annual_avg_emplvl to NAICS2012 level employment"
	
	tempfile pre2012price
	save `pre2012price.dta', replace
restore

merge m:1 naics2012 year using `pre2012price.dta'
drop _merge

replace price_TIV_2012NAICS = price_TIV if year >= 2012
replace price_fuel_TIV_2012NAICS = price_fuel_TIV if year >= 2012
replace price_TV_2012NAICS = price_TV if year >= 2012
replace price_fuel_TV_2012NAICS = price_fuel_TV if year >= 2012
replace price_elec_2012NAICS = price_elec if year >= 2012

keep census_naics naics2007 naics2012 naics industryname2007 industryname2012 sector naics_mecs year price_*_2012NAICS
rename *_2012NAICS *

save "$energy_price/energy_price_industry_year.dta", replace

